In [1]:
import numpy as np
import matplotlib.pyplot as pl
import seaborn as sn
import emcee
import pymilne
%matplotlib inline

This notebook shows how to extract information from a spectral line using the weak-field approximation. It uses a Bayesian approach to the problem.

Index

Simple case

Let us generate an artificial spectral line in the weak field regime and do the inference.


In [3]:
lambda0 = 6301.5080
JUp = 2.0
JLow = 2.0
gUp = 1.5
gLow = 1.833
lambdaStart = 6300.8
lambdaStep = 0.03
nLambda = 50

lineInfo = np.asarray([lambda0, JUp, JLow, gUp, gLow, lambdaStart, lambdaStep])

s = pymilne.milne(nLambda, lineInfo)


stokes = np.zeros((4,nLambda))

BField = 100.0
BTheta = 20.0
BChi = 0.0
VMac = 2.0
damping = 0.0
B0 = 0.8
B1 = 0.2
mu = 1.0
VDop = 0.085
kl = 5.0
modelSingle = np.asarray([BField, BTheta, BChi, VMac, damping, B0, B1, VDop, kl])
stokes = s.synth(modelSingle,mu)
wl = np.arange(nLambda)*lambdaStep + lambdaStart

In [4]:
f, ax = pl.subplots(ncols=2, nrows=1, figsize=(12,6))
ax = ax.flatten()
labels = ['I/I$_c$','V/I$_c$']
which = [0,3]
for i in range(2):
    ax[i].plot(wl, stokes[which[i],:])
    ax[i].set_xlabel('Wavelength [$\AA$]')
    ax[i].set_ylabel(labels[i])
    ax[i].ticklabel_format(useOffset=False)
pl.tight_layout()


/usr/pkg/python/Python-2.7.6/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-linux-x86_64.egg/matplotlib/font_manager.py:1282: UserWarning: findfont: Font family [u'Arial'] not found. Falling back to Bitstream Vera Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [5]:
sigma = 1e-4
stI = stokes[0,:] + sigma*np.random.randn(nLambda)
stV = stokes[3,:] + sigma*np.random.randn(nLambda)
diffI = np.gradient(stI, lambdaStep)

Let us check if the line is in the weak-field regime. For this, we plot Stokes V vs. the wavelength derivative of Stokes I and check if it is a straight line.


In [1]:
pl.plot(diffI, stV)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-4c884769c567> in <module>()
----> 1 pl.plot(diffI, stV)

NameError: name 'diffI' is not defined

We can do a linear fit and get the magnetic flux density. Remember that the fit has to be $y=a*x$. To do the fit, write the $\chi^2$ and optimize it.


In [8]:
coef = np.sum(stV*diffI)/np.sum(diffI**2)
print coef
print "Original BLOS={0:6.2f} Mx cm-2 - Inferred BLOS={1:6.2f} Mx cm-2".format(BField*np.cos(BTheta*np.pi/180.0),-coef / (4.6686e-13*6301.**2*1.5))


-0.00304243983052
Original BLOS= 93.97 Mx cm-2 - Inferred BLOS=109.43 Mx cm-2

Let us investigate a little the $\chi^2$ surface


In [7]:
class linearFit(object):
    def __init__(self, diffI, V, noise):
        self.diffI = diffI
        self.V = V
        self.noise = noise
        self.lower = np.asarray([0.0, 0.0])
        self.upper = np.asarray([500.0, 180.0])

    def logPosterior(self, theta):
        if ( (theta[0] < self.lower[0]) | (theta[0] > self.upper[0]) | (theta[1] < self.lower[1]) | (theta[1] > self.upper[1]) ):
            return -np.inf
        else:
            model = -4.6686e-13 * 6301.0**2 * 1.5 * theta[0] * np.cos(theta[1] * np.pi/180.0) * self.diffI
            return -np.sum((self.V-model)**2 / (2.0*self.noise**2))        

    def sample(self):        
        ndim, nwalkers = 2, 500
        self.theta0 = np.asarray([50.0,30.0])
        p0 = [self.theta0 + 0.01*np.random.randn(ndim) for i in range(nwalkers)]
        self.sampler = emcee.EnsembleSampler(nwalkers, ndim, self.logPosterior)
        self.sampler.run_mcmc(p0, 300)

In [8]:
out = linearFit(diffI, stV, sigma)

In [9]:
out.sample()

In [10]:
BSamples = out.sampler.chain[:,-20:,0].flatten()
thBSamples = out.sampler.chain[:,-20:,1].flatten()

In [119]:
f, ax = pl.subplots(ncols=2, nrows=3, figsize=(12,8))
ax[0,0].plot(BSamples)
ax[0,1].hist(BSamples)
ax[0,0].set_ylabel('B')
ax[0,1].set_xlabel('B')
ax[1,0].plot(thBSamples)
ax[1,1].hist(thBSamples)
ax[1,0].set_ylabel(r'$\theta_B$')
ax[1,1].set_xlabel(r'$\theta_B$')
ax[2,0].plot(BSamples*np.cos(thBSamples*np.pi/180.0))
ax[2,1].hist(BSamples*np.cos(thBSamples*np.pi/180.0))
ax[2,0].set_ylabel(r'B*cos($\theta_B$)')
ax[2,1].set_xlabel(r'B*cos($\theta_B$)')
pl.tight_layout()
pl.savefig('weakSamplingSimple.png')



In [14]:
f, ax = pl.subplots(figsize=(12,10))
ax.hexbin(BSamples,thBSamples)
ax.set_xlabel('B [G]')
ax.set_ylabel(r'$\theta_B$ [deg]')


Out[14]:
<matplotlib.text.Text at 0x9795310>

Biases

The maximum likelihood solution can be written analytically when using $B_\parallel$ and $B_\perp$. It is interesting to analyze the behavior of $B_\perp$ when there is noise. It is given by:

$$ B_\perp^2 = \frac{1}{C^2} \frac{\left[ \left(\sum_i Q_i I_i'' \right)^2+ \left(\sum_i U_i I_i'' \right)^2 \right]^{1/2}} {\sum_i I_i''^2} $$

Imagine the case of pure noise in Stokes $Q$ and $U$.


In [19]:
stI = stokes[0,:] + sigma*np.random.randn(nLambda)
stQ = sigma*np.random.randn(nLambda)
stU = sigma*np.random.randn(nLambda)
stIp = 6301.0**2*1.5*np.gradient(stI, lambdaStep)
stIpp = 0.25*6301.0**2*1.5*np.gradient(stIp, lambdaStep)

In [23]:
C = 4.67e-13
Bpar = np.zeros(500)
Bperp = np.zeros(500)
for i in range(500):
    stV = stokes[3,:] + sigma*np.random.randn(nLambda)
    stQ = stokes[1,:] + sigma*np.random.randn(nLambda)
    stU = stokes[2,:] + sigma*np.random.randn(nLambda)
    Bpar[i] = -1.0/C * np.sum(stV*stIp) / np.sum(stIp**2)
    Bperp[i] = np.sqrt(1.0/C**2 * np.sqrt( np.sum(stQ*stIpp)**2 + np.sum(stU*stIpp)**2 ) / np.sum(stIpp**2))
f, ax = pl.subplots(ncols=2, nrows=2, figsize=(9,9))
ax[0,0].hist(Bpar)
ax[0,1].hist(Bperp)
ax[1,0].hist(180.0/np.pi*np.arctan(Bperp, Bpar))


Out[23]:
(array([   1.,    1.,    0.,    1.,    4.,   10.,   18.,   59.,  184.,  222.]),
 array([ 87.83371561,  88.02342117,  88.21312672,  88.40283228,
         88.59253784,  88.7822434 ,  88.97194896,  89.16165451,
         89.35136007,  89.54106563,  89.73077119]),
 <a list of 10 Patch objects>)

Errors in both coordinates

In fact, we always have noise both in Stokes V and I (typically being larger in Stokes I). Therefore, our model fitting has to take this into account.


In [78]:
lambda0 = 6301.5080
JUp = 2.0
JLow = 2.0
gUp = 1.5
gLow = 1.833
lambdaStart = 6300.8
lambdaStep = 0.015
nLambda = 100

lineInfo = np.asarray([lambda0, JUp, JLow, gUp, gLow, lambdaStart, lambdaStep])

s = pymilne.milne(nLambda, lineInfo)


stokes = np.zeros((4,nLambda))

BField = 100.0
BTheta = 20.0
BChi = 0.0
VMac = 2.0
damping = 0.0
B0 = 0.8
B1 = 0.2
mu = 1.0
VDop = 0.085
kl = 5.0
modelSingle = np.asarray([BField, BTheta, BChi, VMac, damping, B0, B1, VDop, kl])
stokes = s.synth(modelSingle,mu)
wl = np.arange(nLambda)*lambdaStep + lambdaStart

stokesNoise = np.copy(stokes)
sigmaI = 5e-3
sigmaV = 1e-3
stokesNoise[0,:] = stokes[0,:] + sigmaI*np.random.randn(nLambda)
stokesNoise[3,:] = stokes[3,:] + sigmaV*np.random.randn(nLambda)
diffI = np.gradient(stokesNoise[0,:], lambdaStep)

In [79]:
f, ax = pl.subplots(ncols=3, nrows=1, figsize=(15,6))
ax = ax.flatten()
labels = ['I/I$_c$','V/I$_c$']
which = [0,3]
for i in range(2):
    ax[i].plot(wl, stokesNoise[which[i],:])
    ax[i].plot(wl, stokes[which[i],:])
    ax[i].set_xlabel('Wavelength [$\AA$]')
    ax[i].set_ylabel(labels[i])
    ax[i].ticklabel_format(useOffset=False)
ax[2].plot(wl, diffI)
ax[2].set_xlabel('Wavelength [$\AA$]')
ax[2].set_ylabel('dI/d$\lambda$')
ax[2].ticklabel_format(useOffset=False)
pl.tight_layout()
sigmaDiffI = np.std(diffI[0:25])
print "Estimated noise in dIdl in units of sigma_I={0}".format(sigmaDiffI / sigmaI)


Estimated noise in dIdl in units of sigma_I=50.9989715506

In [120]:
f, ax = pl.subplots(figsize=(12,6))
ax.errorbar(diffI, stokesNoise[3,:], yerr=sigmaV, xerr=sigmaDiffI, fmt='.')


Out[120]:
<Container object of 3 artists>

The exact value of the noise in the derivative depends on how you carry out the numerical derivative. For instance, if we use a first order derivative formula, the error propagation formula states:

$$ \sigma_{I'}^2 = \left( \frac{2}{\Delta \lambda} \right)^2 \sigma_I^2 $$

First, let us compute the maximum-likelihood solution neglecting errors in x. This is obtained just by:


In [117]:
coef = np.sum(stokesNoise[3,:]*diffI)/np.sum(diffI**2)
print "Estimated field = {0} G".format(coef / (-4.6686e-13*6301.**2*1.5))
print "Real field = {0} G".format(BField*np.cos(BTheta*np.pi/180.0))


Estimated field = 73.1653062299 G
Real field = 93.9692620786 G

Now we sample the distribution taking into account the presence of uncertainties in both axis. Following standard techniques, we assume that our measurement fulfills

$$ y_i = z_i + e_i $$

where $z_i$ is our predictor, characterized by a distribution $p_Z(z_i)$ and $e_i$ is the uncertainty in the measurement, characterized by the distribution $p_E(e_i)$. In such case, the distribution for $y_i$ is given by

$$ p(y_i) = \int dz_i p_Z(z_i) p_E(y_i-z_i) $$

In our case, we are assuming that $z_i$ is uncertain because the points at which they are evaluated are uncertain. Assume that the uncertainty in $x_i$ is Gaussian, so that

$$ p(x_i) = N(x_{i0},\sigma_x^2) $$

where $x_{i0}$ is the nominal value of $x_i$. Now, we know that, in our case, $z_i=\alpha x_i$, because they follow a linear relationship. In this case, it is easy to compute $p_Z(z_i)$ from the change of variables:

$$ p_Z(z_i) = p_X(x_i) \left| \frac{dx_i}{dz_i} \right| = p_X \left( \frac{z_i}{\alpha} \right) \left| \frac{dx_i}{dz_i} \right| $$

Taking this into account, the final likelihood function is

$$ p(y_i) = \frac{1}{\sqrt{2\pi \left(\sigma_y^2 + \alpha \sigma_x^2 \right)}} \exp \left[ -\frac{1}{2} \frac{(y_i-\alpha x_i)^2}{\sigma_y^2 + \alpha \sigma_x^2} \right] $$

Another equivalent way of seeing this is to include the unobserved real values of the $x$ and $y$ points into the Bayesian problem and finally marginalizing it.

$$ p(\mathbf{x},\mathbf{y},\alpha|\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}) = p(\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}|\mathbf{x},\mathbf{y},\alpha) p(\mathbf{x},\mathbf{y},\alpha) $$$$ p(\alpha|\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}) = \int d \mathbf{x} d \mathbf{y} p(\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}|\mathbf{x},\mathbf{y},\alpha) p(\mathbf{x},\mathbf{y},\alpha) $$

Given that we are assuming that $y=\alpha x$, we can remove $y$ from the previous equations (using a Dirac delta prior) and finally compute

$$ p(\alpha|\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}) = \int d \mathbf{x} p(\mathbf{x}_\mathrm{obs},\mathbf{y}_\mathrm{obs}|\mathbf{x},\alpha) p(\mathbf{x},\alpha) $$

where the likelihood for each point is given by a 2D Gaussian distribution

$$ p(x_\mathrm{obs,i},y_\mathrm{obs,i}|\mathbf{x},\alpha) = \frac{1}{2\pi \sqrt{\sigma_x^2 \sigma_y^2}} \exp \left[ -\frac{1}{2} \left( \frac{(x-x_\mathrm{obs,i})^2}{\sigma_x^2} + \frac{(\alpha x-y_\mathrm{obs,i})^2}{\sigma_y^2} \right) \right] $$

If one computes the integral over $x$, the same result is obtained.


In [98]:
class linearBothAxisFit(object):
    def __init__(self, diffI, V, noiseDiffI, noiseV):
        self.diffI = diffI
        self.V = V
        self.noiseDiffI = noiseDiffI
        self.noiseV = noiseV
        self.lower = np.asarray([-0.01])
        self.upper = np.asarray([0.01])

    def logPosterior(self, theta):
        if ( (theta[0] < self.lower[0]) | (theta[0] > self.upper[0]) ):
            return -np.inf
        else:
            model = theta[0] * self.diffI
            variance = self.noiseV**2 + theta[0]**2 * self.noiseDiffI**2
            return -0.5*np.sum(np.log(variance)) - 0.5 * np.sum((self.V-model)**2 / variance)

    def sample(self):        
        ndim, nwalkers = 1, 500
        self.theta0 = np.asarray([-0.005])
        p0 = [self.theta0 + 0.001*np.random.randn(ndim) for i in range(nwalkers)]
        self.sampler = emcee.EnsembleSampler(nwalkers, ndim, self.logPosterior)
        self.sampler.run_mcmc(p0, 300)
        
class linearSingleAxisFit(object):
    def __init__(self, diffI, V, noiseV):
        self.diffI = diffI
        self.V = V        
        self.noiseV = noiseV
        self.lower = np.asarray([-0.01])
        self.upper = np.asarray([0.01])

    def logPosterior(self, theta):
        if ( (theta[0] < self.lower[0]) | (theta[0] > self.upper[0]) ):
            return -np.inf
        else:
            model = theta[0] * self.diffI
            variance = self.noiseV**2
            return -0.5*np.sum(np.log(variance)) - 0.5 * np.sum((self.V-model)**2 / variance)

    def sample(self):        
        ndim, nwalkers = 1, 500
        self.theta0 = np.asarray([-0.005])
        p0 = [self.theta0 + 0.001*np.random.randn(ndim) for i in range(nwalkers)]
        self.sampler = emcee.EnsembleSampler(nwalkers, ndim, self.logPosterior)
        self.sampler.run_mcmc(p0, 300)

In [105]:
outBoth = linearBothAxisFit(diffI, stokesNoise[3,:], sigmaDiffI, sigmaV)
outBoth.sample()
samplesBoth = outBoth.sampler.chain[:,-20:,0].flatten() / (-4.6686e-13*6301.**2*1.5)

In [106]:
outSingle = linearSingleAxisFit(diffI, stokesNoise[3,:], sigmaV)
outSingle.sample()
samplesSingle = outSingle.sampler.chain[:,-20:,0].flatten() / (-4.6686e-13*6301.**2*1.5)

In [122]:
f, ax = pl.subplots(ncols=2, nrows=1, figsize=(15,6))
ax[0].plot(samplesBoth)
ax[0].plot(samplesSingle)
ax[1].hist(samplesBoth, bins=40, label='xy errors')
ax[1].hist(samplesSingle, bins=40, label='y errors')
ax[0].set_ylabel(r'$\alpha$')
ax[1].set_xlabel(r'$\alpha$')
ax[0].axhline(BField*np.cos(BTheta*np.pi/180.0))
ax[1].axvline(BField*np.cos(BTheta*np.pi/180.0))
pl.legend()


Out[122]:
<matplotlib.legend.Legend at 0x13cdeb50>

In [ ]: